## *************************************
##          B4 GAM Regressions       
##         Hao Zhang & Ye Zhang
##              2025/7/24
## *************************************

# load packages
library(lfe)
library(stargazer)
library(tidyverse)
library(mgcv)
library(cenGAM)

# load data
rm(list = ls())
load("../analysis data/firm analysis.RData")

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# run GAM
set.seed(02139)
gam.fit1 <- gam(insurance_dummy ~ s(gdp_pressure) + ihs(employment) + 
                  ihs(wage) + ihs(output1) + ihs(profit) +  
                  log(FDI + 1) + gdpgrowth + log(gdp/population) +
                  ihs(deficit/gdp) +  ihs(capacity) + tax + 
                  factor(year) + factor(educ) + factor(ethnicity)  + factor(gender) + 
                  factor(begin) + factor(connection) + factor(seq) + factor(citycode), data = firm)

firm1 <- firm %>% filter(rate1 > 0)
set.seed(02139)
gam.fit2 <- gam(log(rate1) ~ s(gdp_pressure) + ihs(employment) + 
                  ihs(wage) + ihs(output1) + ihs(profit) +  
                  log(FDI + 1) + gdpgrowth + log(gdp/population) +
                  ihs(deficit/gdp) +  ihs(capacity) + tax + 
                  factor(year) + factor(educ) + factor(ethnicity)  + factor(gender) + 
                  factor(begin) + factor(connection) + factor(seq) + factor(citycode), data = firm1)

set.seed(02139)
gam.fit3 <- gam(medical_dummy ~ s(gdp_pressure) + ihs(employment) + 
                  ihs(wage) + ihs(output1) + ihs(profit) +  
                  log(FDI + 1) + gdpgrowth + log(gdp/population) +
                  ihs(deficit/gdp) +  ihs(capacity) + tax + 
                  factor(year) + factor(educ) + factor(ethnicity)  + factor(gender) + 
                  factor(begin) + factor(connection) + factor(seq) + factor(citycode), data = firm)

firm1 <- firm %>% filter(rate2 > 0)
set.seed(02139)
gam.fit4 <- gam(log(rate2) ~ s(gdp_pressure) + ihs(employment) + 
                  ihs(wage) + ihs(output1) + ihs(profit) +  
                  log(FDI + 1) + gdpgrowth + log(gdp/population) +
                  ihs(deficit/gdp) +  ihs(capacity) + tax + 
                  factor(year) + factor(educ) + factor(ethnicity)  + factor(gender) + 
                  factor(begin) + factor(connection) + factor(seq) + factor(citycode), data = firm1)

# combine two plots
result1 <- matrix(NA, nrow = 100, ncol = 4)
result2 <- matrix(NA, nrow = 100, ncol = 4)
result3 <- matrix(NA, nrow = 100, ncol = 4)
result4 <- matrix(NA, nrow = 100, ncol = 4)

test <- plot(gam.fit1, select = 1)
result1[,1] <- test[[1]][["x"]]
result1[,2] <- test[[1]][["fit"]]
result1[,3] <- test[[1]][["fit"]] - test[[1]][["se"]]
result1[,4] <- test[[1]][["fit"]] + test[[1]][["se"]]
result1 <- result1[which(result1[,1] > -0.25 & result1[,1] < 0.25), ]

test <- plot(gam.fit2, select=1)
result2[,1] <- test[[1]][["x"]]
result2[,2] <- test[[1]][["fit"]]
result2[,3] <- test[[1]][["fit"]] - test[[1]][["se"]]
result2[,4] <- test[[1]][["fit"]] + test[[1]][["se"]]
result2 <- result2[which(result2[,1] > -0.25 & result2[,1] < 0.25), ]

test <- plot(gam.fit3, select=1)
result3[,1] <- test[[1]][["x"]]
result3[,2] <- test[[1]][["fit"]]
result3[,3] <- test[[1]][["fit"]] - test[[1]][["se"]]
result3[,4] <- test[[1]][["fit"]] + test[[1]][["se"]]
result3 <- result3[which(result3[,1] > -0.25 & result3[,1] < 0.25), ]

test <- plot(gam.fit4, select=1)
result4[,1] <- test[[1]][["x"]]
result4[,2] <- test[[1]][["fit"]]
result4[,3] <- test[[1]][["fit"]] - test[[1]][["se"]]
result4[,4] <- test[[1]][["fit"]] + test[[1]][["se"]]
result4 <- result4[which(result4[,1] > -0.25 & result4[,1] < 0.25), ]

pdf("GAM_coverage.pdf", width = 12, height = 8)
par(mar = c(5,6,1,2))
plot(x = result1[,1], y = result1[,2], type = "l", lwd = 2.5,
     ylim = c(-0.3, 0.5), ylab = "Partial Effect of GDP Growth Pressure on Coverage",
     xlim = c(-0.3, 0.3), col = "blue",
     xlab = "GDP Growth Pressure",
     main = "", 
     cex.lab = 1.6, cex.axis = 1.4)
points(x = result1[,1], y = result1[,3], type = "l", lty = "dashed", lwd = 1, col = "blue")
points(x = result1[,1], y = result1[,4], type = "l", lty = "dashed", lwd = 1, col = "blue")
points(x = result3[,1], y = result3[,2], type = "l", lwd = 2.5, col = "red")
points(x = result3[,1], y = result3[,3], type = "l", lty = "dashed", lwd = 1, col = "red")
points(x = result3[,1], y = result3[,4], type = "l", lty = "dashed", lwd = 1, col = "red")
legend(x = -0.28, y = 0.5, legend=c("Medical Insurance and Pension", "Unemployment Insurance"), col = c("red", "blue"), lty=1:1,  box.lty=0, lwd = 2:2:2, cex = 1.3)
dev.off()
